"""
Code to replicate the plots and tables from the MC results
"""

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

sns.set(rc={'figure.figsize':(12,9)})
sns.set_style("whitegrid")

def plot_beta(reg_type='single'):
    """
    Code to plot the MC results present in the Appendix
    Saves the plot in the plots folder
    Inputs
    ---------
    reg_type : 'single','glm' or 'multiple'
    """
    
    ecc_beta = pd.read_csv('../results/MC Regression/ECC_beta.csv')
    br_beta = pd.read_csv('../results/MC Regression/BR_beta.csv')
    
    ecc_beta['Approach'] = 'ECC'
    br_beta['Approach'] = 'BR'
    
    if reg_type == 'single':
        names_list = ['Intercept','Y4','Approach','test_split']
        save_name = 'figure_g2'
        
    elif reg_type == 'multiple':
        names_list = ['Intercept','Y4','Y2','Y3','Approach','test_split']
        save_name = 'figure_g3'
        
    elif reg_type == 'glm':
        names_list = ['Intercept','Y3','Approach','test_split']
        save_name = 'figure_g4'
    
    ecc_beta = ecc_beta.rename(columns={'Y4_cc':'Y4','Y2_cc':'Y2','Y3_cc':'Y3'})
    br_beta = br_beta.rename(columns={'Y4_br':'Y4','Y2_br':'Y2','Y3_br':'Y3'})
    ecc_beta = ecc_beta.loc[ecc_beta['reg_type'] == reg_type,names_list]
    ecc_beta = ecc_beta.melt(['Approach','test_split'])
    br_beta = br_beta.loc[br_beta['reg_type'] == reg_type,names_list]
    br_beta = br_beta.melt(['Approach','test_split'])
    
    df = pd.concat([ecc_beta,br_beta]).rename(columns={'variable':'Variable', 'value':'Value'})
    
    sns.boxplot(x="Variable", y="Value",
            hue="Approach", palette='viridis',
            data=df)

    if reg_type != 'glm':
        plt.axhline(1, ls='--',label='True Beta Value')
    else:
        plt.axhline(1.13, ls='--',label='Average of the True Intercept Term',color='red')
        plt.axhline(-2.26, ls='--',label='Average of the True Beta Term')
    plt.legend()
    
    plt.savefig(f'../plots/{save_name}.png')
    plt.savefig(f'../plots/{save_name}.eps', format='eps')
    plt.clf()

def generate_data_tables():
    """
    Code to generate the MC results present in the Appendix
    Saves the table in the results folder
    Inputs
    ---------
    table_type : 'CP','beta' or 'MSE'
    """
    
    ecc_cp = pd.read_csv('../results/MC Regression/ECC_CP.csv').drop(columns=['Unnamed: 0'])
    br_cp = pd.read_csv('../results/MC Regression/BR_CP.csv').drop(columns=['Unnamed: 0'])
    (pd.concat([ecc_cp.groupby(['reg_type','test_split']).mean(),br_cp.groupby(
        ['reg_type','test_split']).mean()],axis=1)).to_csv('../results/MC Regression/CP.csv')
        
    ecc_beta = pd.read_csv('../results/MC Regression/ECC_beta.csv').drop(columns=['Unnamed: 0'])
    br_beta = pd.read_csv('../results/MC Regression/BR_beta.csv').drop(columns=['Unnamed: 0'])
    (pd.concat([ecc_beta.groupby(['reg_type','test_split']).mean(),br_beta.groupby(
        ['reg_type','test_split']).mean()],axis=1)).to_csv('../results/MC Regression/beta.csv')

    ecc_mse = pd.read_csv('../results/MC Regression/ECC_MSE.csv').drop(columns=['Unnamed: 0'])
    br_mse = pd.read_csv('../results/MC Regression/BR_MSE.csv').drop(columns=['Unnamed: 0'])
    (pd.concat([ecc_mse.groupby(['reg_type','test_split']).mean(),br_mse.groupby(
        ['reg_type','test_split']).mean()],axis=1)).to_csv('../results/MC Regression/MSE.csv')


def finalize_tables():

    cp = pd.read_csv('../results/MC Regression/CP.csv')
    beta = pd.read_csv('../results/MC Regression/beta.csv')
    mse = pd.read_csv('../results/MC Regression/MSE.csv')

    dict_df = {'cp':cp,'beta':beta,'mse':mse}

    temp_list = []
    for name,df in dict_df.items():
        mask = df.reg_type=='single'
        temp = df[mask]
        temp.reg_type = 'univariate'
        temp = temp.set_index(['reg_type','test_split'])
        temp.columns = [col+'_'+name for col in temp.columns]
        temp_list.append(temp)
    pd.concat(temp_list,axis=1).to_csv('../results/MC Regression/table_g2.csv')
    finalize_g2()
    temp_list = []
    for name,df in dict_df.items():
        mask = df.reg_type=='multiple'
        temp = df[mask]
        temp = temp.set_index(['reg_type','test_split'])
        temp.columns = [col+'_'+name for col in temp.columns]
        temp_list.append(temp)
    pd.concat(temp_list,axis=1).to_csv('../results/MC Regression/table_g3.csv')
    finalize_g3()
    temp_list = []
    for name,df in dict_df.items():
        mask = df.reg_type=='glm'
        temp = df[mask]
        temp = temp.set_index(['reg_type','test_split'])
        temp.columns = [col+'_'+name for col in temp.columns]
        temp_list.append(temp)
    pd.concat(temp_list,axis=1).to_csv('../results/MC Regression/table_g4.csv')
    finalize_g4()
    
def finalize_g2():
    g2 = pd.read_csv('../results/MC Regression/table_g2.csv')
    temp_list = []
    for k in range(4):
        temp_list.append(np.around(pd.to_numeric(g2.loc[k,['Intercept_beta','Intercept.1_beta','Y4_cc_beta','Y4_br_beta']].values),3))
        temp_list.append(np.around(pd.to_numeric(g2.loc[k,['Intercept_mse','Intercept.1_mse','Y4_cc_mse','Y4_br_mse']].values),3))
        temp_list.append(np.around(pd.to_numeric(g2.loc[k,['Intercept_cp','Intercept.1_cp','Y4_cc_cp','Y4_br_cp']].values),3))
    index_names = [
    'Experiment 1','Training 85%', 'Testing 15%',
    'Experiment 2','Training 70%', 'Testing 30%',
    'Experiment 3','Training 55%', 'Testing 45%',
    'Experiment 4','Training 40%', 'Testing 60%',
    ]
    col_names = ['ECC Intercept','BR Intercept', 'ECC Y4', 'BR Y4']
    out_df = pd.DataFrame(temp_list,index = index_names, columns=col_names)
    out_df.to_csv('../results/MC Regression/table_g2.csv')

def finalize_g3():
    g2 = pd.read_csv('../results/MC Regression/table_g3.csv')
    temp_list = []
    for k in range(4):
        temp_list.append(np.around(pd.to_numeric(g2.loc[k,['Intercept_beta','Intercept.1_beta','Y2_cc_beta','Y2_br_beta','Y3_cc_beta','Y3_br_beta','Y4_cc_beta','Y4_br_beta']].values),3))
        temp_list.append(np.around(pd.to_numeric(g2.loc[k,['Intercept_mse','Intercept.1_mse','Y2_cc_mse','Y2_br_mse','Y3_cc_mse','Y3_br_mse','Y4_cc_mse','Y4_br_mse']].values),3))
        temp_list.append(np.around(pd.to_numeric(g2.loc[k,['Intercept_cp','Intercept.1_cp','Y2_cc_cp','Y2_br_cp','Y3_cc_cp','Y3_br_cp','Y4_cc_cp','Y4_br_cp']].values),3))
    index_names = [
    'Experiment 1','Training 85%', 'Testing 15%',
    'Experiment 2','Training 70%', 'Testing 30%',
    'Experiment 3','Training 55%', 'Testing 45%',
    'Experiment 4','Training 40%', 'Testing 60%',
    ]
    col_names = ['ECC Intercept','BR Intercept', 'ECC Y2', 'BR Y2', 'ECC Y3', 'BR Y3', 'ECC Y4', 'BR Y4']
    out_df = pd.DataFrame(temp_list,index = index_names, columns=col_names)
    out_df.to_csv('../results/MC Regression/table_g3.csv')
    
def finalize_g4():
    g2 = pd.read_csv('../results/MC Regression/table_g4.csv')
    temp_list = []
    for k in range(4):
        temp_list.append(np.around(pd.to_numeric(g2.loc[k,['Intercept_beta','Intercept.1_beta','Y3_cc_beta','Y3_br_beta']].values),3))
        temp_list.append(np.around(pd.to_numeric(g2.loc[k,['Intercept_mse','Intercept.1_mse','Y3_cc_mse','Y3_br_mse']].values),3))
        temp_list.append(np.around(pd.to_numeric(g2.loc[k,['Intercept_cp','Intercept.1_cp','Y3_cc_cp','Y3_br_cp']].values),3))
    index_names = [
    'Experiment 1','Training 85%', 'Testing 15%',
    'Experiment 2','Training 70%', 'Testing 30%',
    'Experiment 3','Training 55%', 'Testing 45%',
    'Experiment 4','Training 40%', 'Testing 60%',
    ]
    col_names = ['ECC Intercept','BR Intercept', 'ECC Y3', 'BR Y3']
    out_df = pd.DataFrame(temp_list,index = index_names, columns=col_names)
    out_df.to_csv('../results/MC Regression/table_g4.csv')


def tables_classification():
    BR = pd.read_csv('../results/MC Classification/BR_classification.csv').drop(['Unnamed: 0'],axis=1).groupby('test_split').mean()
    ECC = pd.read_csv('../results/MC Classification/ECC_classification.csv').drop(['Unnamed: 0'],axis=1).groupby('test_split').mean()
    BR.to_csv('../results/MC Classification/table_g1_part1.csv')
    ECC.to_csv('../results/MC Classification/table_g1_part2.csv')



def plot_classification():
    """
    Code to generate the MC classifcation results present in the Appendix
    Saves the plot in the results folder
    """    
    ecc_df = pd.read_csv('../results/MC Classification/ECC_classification.csv').drop(columns=['Unnamed: 0'])
    ecc_df['Approach'] = 'ECC'
    ecc_df = ecc_df.rename(columns=dict(zip(list(ecc_df[:5]),['Accuracy','Hamming Loss','F1 Macro', 'F1 Micro', 'Ranking Loss'])))
    br_df = pd.read_csv('../results/MC Classification/BR_classification.csv').drop(columns=['Unnamed: 0'])
    br_df['Approach'] = 'BR'
    br_df = br_df.rename(columns=dict(zip(list(br_df[:5]),['Accuracy','Hamming Loss','F1 Macro', 'F1 Micro', 'Ranking Loss'])))
    df = pd.concat([ecc_df,br_df])
    df = df.melt(['Accuracy','Hamming Loss','Ranking Loss','test_split','Approach'])
    df = df.rename(columns={'value':'F1 Score','variable':'F1 Type'})
    
    f, axes = plt.subplots(2, 2,figsize=(19, 12))
    sns.boxplot(x="Approach", y="Accuracy", palette='viridis',
                data=df, ax=axes[0,0])
    sns.boxplot(x="F1 Type", y="F1 Score", palette='viridis', hue = 'Approach',
                data=df, ax=axes[0,1])
    sns.boxplot(x="Approach", y="Hamming Loss", palette='viridis',
                data=df, ax=axes[1,0])
    sns.boxplot(x="Approach", y="Ranking Loss", palette='viridis',
                data=df, ax=axes[1,1])

    plt.legend()
    plt.savefig(f'../plots/figure_g1.png')
    plt.savefig(f'../plots/figure_g1.eps', format='eps')
    plt.clf()
    
def main():
    
    #  regression plots
    plot_beta('single')
    plot_beta('multiple')
    plot_beta('glm')
    #  classification plots
    plot_classification()
    #  regression tables
    generate_data_tables()
    finalize_tables()
    tables_classification()

if __name__ == "__main__":
    main()
